root.dir <- "~/OneDrive/projects/"
source(paste0(root.dir,"R.utils/RNASEQ.utils.R"))
project.dir <- paste0(root.dir,"PDX/")
source(paste0(project.dir,"src/load.PDX.R"))
rnaseq <- get.PDX()
Read 0.0% of 113262 rows
Read 44.1% of 113262 rows
Read 88.3% of 113262 rows
Read 113262 rows and 749 (of 749) columns from 0.176 GB file in 00:00:06
meta <- load.meta()
meta <- subset.meta(meta,meta[,project=="metastasis" & patient=="AB861"])
rnaseq <- subset.PDX(rnaseq, rnaseq$meta[,Sample.name] %in% meta[,Sample.name])
hpa.breast <- fread(paste0(root.dir,"shared.data/human.protein.atlas/tissue_specificity_rna_breast.tsv"),check.names = T)
hpa.lymph <- fread(paste0(root.dir,"shared.data/human.protein.atlas/tissue_specificity_rna_lymph.tsv"),check.names = T)
hpa.gene.list <- hpa.lymph[!(Gene %in% hpa.breast$Gene),.(Gene,Ensembl)]
read.depth.plots(rnaseq$meta)
There appears to be some variation across the samples as to the mouse fraction. Does this correlate to the passage number?
x <- melt(rnaseq$meta,id.vars=c("Sample.name","Flowcell","Lane","Pool"),measure.vars = c("mouse.library.size","human.library.size"))
x <- merge(x,meta)[,.(tissue.grafted,tissue.sampled,variable,value/sum(value)),Sample.name][variable=="mouse.library.size"]
ggplot(x) + aes(x=tissue.grafted,y=V4,colour=tissue.sampled) + geom_boxplot() + theme(axis.text.x=element_text(angle=90,hjust=1))
While x4 samples do have a larger mouse fraction than others, the x3 samples have less than either the x2 or x1 samples indicating that it isn’t a clear trend. Also the samples taken from the patient show little to no mouse fraction as would be expected (a good sanity check).
No big issues –> proceeding to merge lanes and meta
rnaseq <- merge.PDX(rnaseq)
rnaseq$meta <- merge(rnaseq$meta,meta,sort=F)
rnaseq$meta[,merged.lanes:=abbreviate(merged.lanes)]
Before we tackle the above, let’s perform a few quick pca’s.
pca <- pca.run(rnaseq$human)
summary(pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
Standard deviation 26.3071 12.4314 8.02369 4.04841 3.19778 3.04445 2.52645 2.52160 2.35886 2.07895 1.86536 1.75551 1.58955 1.5159 1.42245 1.39556
Proportion of Variance 0.6921 0.1545 0.06438 0.01639 0.01023 0.00927 0.00638 0.00636 0.00556 0.00432 0.00348 0.00308 0.00253 0.0023 0.00202 0.00195
Cumulative Proportion 0.6921 0.8466 0.91098 0.92737 0.93760 0.94687 0.95325 0.95961 0.96517 0.96950 0.97298 0.97606 0.97858 0.9809 0.98291 0.98485
PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32
Standard deviation 1.31009 1.27388 1.24328 1.13470 1.07615 1.05624 1.03049 0.93928 0.89966 0.88765 0.87188 0.83051 0.79480 0.72142 0.56480 0.48835
Proportion of Variance 0.00172 0.00162 0.00155 0.00129 0.00116 0.00112 0.00106 0.00088 0.00081 0.00079 0.00076 0.00069 0.00063 0.00052 0.00032 0.00024
Cumulative Proportion 0.98657 0.98819 0.98974 0.99103 0.99218 0.99330 0.99436 0.99524 0.99605 0.99684 0.99760 0.99829 0.99892 0.99944 0.99976 1.00000
PC33
Standard deviation 1.168e-14
Proportion of Variance 0.000e+00
Cumulative Proportion 1.000e+00
pca <- cbind(rnaseq$meta,pca$x)
ggplot(pca) + aes(x=PC1,y=PC2,col=tissue.sampled,shape=xenograft) + geom_point(size=5)
The human samples are clearly very differet to the rest. However, what is causing the grouping between the xenograft samples?
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft])
pca <- pca.run(x$human)
summary(pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
Standard deviation 24.9891 9.29872 6.68410 6.12936 5.78557 5.16967 4.66610 3.9367 3.84352 3.44039 2.90136 2.8459 2.7384 2.57268 2.53240 2.48157
Proportion of Variance 0.6244 0.08647 0.04468 0.03757 0.03347 0.02673 0.02177 0.0155 0.01477 0.01184 0.00842 0.0081 0.0075 0.00662 0.00641 0.00616
Cumulative Proportion 0.6244 0.71092 0.75560 0.79317 0.82664 0.85336 0.87514 0.8906 0.90541 0.91724 0.92566 0.9338 0.9413 0.94788 0.95429 0.96045
PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
Standard deviation 2.25566 2.23810 2.11002 1.96986 1.84778 1.7883 1.73908 1.64565 1.5152 1.47298 1.43199 1.09049 1.03512 1.11e-14
Proportion of Variance 0.00509 0.00501 0.00445 0.00388 0.00341 0.0032 0.00302 0.00271 0.0023 0.00217 0.00205 0.00119 0.00107 0.00e+00
Cumulative Proportion 0.96554 0.97055 0.97500 0.97888 0.98229 0.9855 0.98852 0.99122 0.9935 0.99569 0.99774 0.99893 1.00000 1.00e+00
pca <- cbind(x$meta,pca$x)
ggplot(pca) + aes(x=PC1,y=PC2,col=tissue.grafted,shape=exp.protocol) + geom_point(size=5)
ggplot(pca) + aes(x=PC1,y=PC2,col=tissue.grafted,shape=tissue.sampled) + geom_point(size=5)
ggplot(pca) + aes(x=PC1,y=PC2,col=tissue.grafted,shape=merged.lanes) + geom_point(size=5)
We note that there is clustering based on the engrafted tissue and the experimental protocol. However, since these latter clusterings are not independent from one another, we cannot discern if the major transcriptomic change after the initial two passages is caused by changes in the tumor (tissue.grafted) or the changed protocol. Additional samples should be sequenced to try discern this. Note that this point in the sequential passages is the first time a mouse met is reimplanted.
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & !(tissue.grafted %in% c("AB861M","AB861M-XT1-XM1-XT1")) & tissue.sampled=="LN"])
tissue.grafted <- make.names(x$meta$tissue.grafted)
d <- model.matrix(~ 0 + tissue.grafted)
cont.matrix <- makeContrasts(
G2vsG1 = tissue.graftedAB861M.XT1.XM1 - tissue.graftedAB861M.XT1,
G3vsG2 = tissue.graftedAB861M.XT1.XM1.XM1 - tissue.graftedAB861M.XT1.XM1,
levels = colnames(d))
#Perform DE
DE.data <- DE.run(x$human,d,2,cont.matrix = cont.matrix)
DE <- decideTest.annotated.DE(DE.data$efit)
'select()' returned 1:many mapping between keys and columns
vennDiagram.paired(DE[,2:3])
DE <- DE[abs(rowSums(DE[,2:3]))>=2]
DE
#Pheatmap w/ batch correction
i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
labels.col <- rep("",nrow(x$meta))
ann.col <- data.frame(Generation=x$meta[,tissue.grafted])
labels.row <- DE$SYMBOL
rownames(ann.col) <- x$meta[,Sample.name]
levels(ann.col$Generation) <- c("G1","G2","G3")
j <- order(ann.col$Generation)
cols <- hue_pal()(4)[1:3]
names(cols) <- levels(ann.col$Generation)
pheatmap(DE.data$v$E[i,j],labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col[j,,drop=F],scale="row",cluster_cols = F,annotation_colors = list(Generation=cols))
#Pheatmap with tissue.graftedAB861M.XT1.XM1.XT1 added
y <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & !(tissue.grafted %in% c("AB861M")) & tissue.sampled=="LN"])
z <- DGEList(y$human[rowSums(cpm(y$human) > cpm.threshold(y$human)) >= 2,])
z <- calcNormFactors(z,method="TMM")
z <- voom(z,y$meta[,model.matrix(~ 0 + tissue.grafted)])
i <- match(DE$ENSEMBL,rownames(z))
labels.col <- rep("",nrow(y$meta))
ann.col <- data.frame(Generation=y$meta[,tissue.grafted])
labels.row <- DE$SYMBOL
rownames(ann.col) <- y$meta[,Sample.name]
levels(ann.col$Generation) <- c("G1","G2","G3","G2x1")
cols <- hue_pal()(4)
names(cols) <- levels(ann.col$Generation)
pheatmap(z[i,],labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row",annotation_colors = list(Generation=cols))
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & !(tissue.grafted %in% c("AB861M","AB861M-XT1-XM1-XT1")) & tissue.sampled=="T"])
tissue.grafted <- make.names(x$meta$tissue.grafted)
d <- model.matrix(~ 0 + tissue.grafted)
cont.matrix <- makeContrasts(
G2vsG1 = tissue.graftedAB861M.XT1.XM1 - tissue.graftedAB861M.XT1,
G3vsG2 = tissue.graftedAB861M.XT1.XM1.XM1 - tissue.graftedAB861M.XT1.XM1,
levels = colnames(d))
#Perform DE
DE.data <- DE.run(x$human,d,2,cont.matrix = cont.matrix)
DE <- decideTest.annotated.DE(DE.data$efit)
'select()' returned 1:many mapping between keys and columns
vennDiagram.paired(DE[,2:3])
DE <- DE[abs(rowSums(DE[,2:3]))==2]
DE
#Pheatmap w/ batch correction
i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
labels.col <- rep("",nrow(x$meta))
ann.col <- data.frame(Generation=x$meta[,tissue.grafted])
labels.row <- DE$SYMBOL
rownames(ann.col) <- x$meta[,Sample.name]
levels(ann.col$Generation) <- c("G1","G2","G3")
j <- order(ann.col$Generation)
cols <- hue_pal()(4)[1:3]
names(cols) <- levels(ann.col$Generation)
pheatmap(DE.data$v$E[i,j],labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col[j,,drop=F],scale="row",cluster_cols = F,annotation_colors = list(Generation=cols))
#Pheatmap with tissue.graftedAB861M.XT1.XM1.XT1 added
y <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & !(tissue.grafted %in% c("AB861M")) & tissue.sampled=="T"])
z <- DGEList(y$human[rowSums(cpm(y$human) > cpm.threshold(y$human)) >= 2,])
z <- calcNormFactors(z,method="TMM")
z <- voom(z,y$meta[,model.matrix(~ 0 + tissue.grafted)])
i <- match(DE$ENSEMBL,rownames(z))
labels.col <- rep("",nrow(y$meta))
ann.col <- data.frame(Generation=y$meta[,tissue.grafted])
labels.row <- DE$SYMBOL
rownames(ann.col) <- y$meta[,Sample.name]
levels(ann.col$Generation) <- c("G1","G2","G3","G2x1")
cols <- hue_pal()(4)
names(cols) <- levels(ann.col$Generation)
pheatmap(z[i,],labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row",annotation_colors = list(Generation=cols))
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & tissue.grafted!="AB861M"])
tissue.sampled <- factor(make.names(x$meta$tissue.sampled),levels = c("T","LN"))
tissue.grafted <- make.names(x$meta$tissue.grafted)
mouse.id <- make.names(x$meta$mouse)
exp.protocol <- make.names(x$meta$exp.protocol)
merged.lanes <- make.names(x$meta$merged.lanes)
d <- model.matrix(~ 0 + tissue.sampled + tissue.grafted + mouse.id)
d <- d[,!(colnames(d) %in% c("mouse.idX45241","mouse.idX48803","mouse.idX48898"))]
cont.matrix <- makeContrasts(
LNvsT = tissue.sampledLN - tissue.sampledT,
levels = colnames(d))
rownames(cont.matrix) <- colnames(d)
#Perform DE
DE.data <- DE.run(x$human,d,2,cont.matrix = cont.matrix)
#Top DE genes table
DE <- topTable.annotated.DE(DE.data$efit,p.val = 0.01)
'select()' returned 1:1 mapping between keys and columns
DE <- DE[!(ENSEMBL %in% hpa.gene.list$Ensembl)]
if(nrow(DE)!=0){
print(DE[direction=="up.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
print(DE[direction=="down.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
#Pheatmap w/ batch correction
i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
y <- removeBatchEffect(DE.data$v$E[i,],design = model.matrix(~tissue.sampled), batch= tissue.grafted)
labels.col <- rep("",nrow(x$meta))
labels.row <- DE$SYMBOL
ann.col <- as.data.frame(x$meta[,.(tissue.sampled,tissue.grafted)])
rownames(ann.col) <- x$meta[,Sample.name]
pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
#Pheatmap w/o batch correction
i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
y <- DE.data$v$E[i,]
labels.col <- rep("",nrow(x$meta))
labels.row <- DE$SYMBOL
ann.col <- as.data.frame(x$meta[,.(tissue.sampled,tissue.grafted)])
rownames(ann.col) <- x$meta[,Sample.name]
pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
}
#GSEA - c6
gsea <- GSEA.run(DE.data$v,d,cont.matrix,Hs.c6,0.05)
'select()' returned 1:many mapping between keys and columns
gsea[Direction=="Up",.(rn,NGenes,Direction,FDR)]
gsea[Direction=="Down",.(rn,NGenes,Direction,FDR)]
#GSEA - c2
gsea <- GSEA.run(DE.data$v,d,cont.matrix,Hs.c2,0.05)
'select()' returned 1:many mapping between keys and columns
gsea[Direction=="Up",.(rn,NGenes,Direction,FDR)]
gsea[Direction=="Down",.(rn,NGenes,Direction,FDR)]
# ssGSEA
ssGSEA.data <- ssGSEA.run(DE.data$v$E,Hs.c2,d,cont.matrix = cont.matrix)
'select()' returned 1:many mapping between keys and columns
The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.
Estimating GSVA scores for 4472 gene sets.
Computing observed enrichment scores
Estimating ECDFs in microarray data with Gaussian kernels
Using parallel with 4 cores
|
| | 0%
#Top DE gene.sets table
ssGSEA <- topTable.annotated.ssGSEA(ssGSEA.data$efit)
if(nrow(ssGSEA)!=0){
print(ssGSEA[direction=="up.reg",.(direction,gene.set,adj.P.Val,logFC)])
print(ssGSEA[direction=="down.reg",.(direction,gene.set,adj.P.Val,logFC)])
#Pheatmap w/ batch correction
i <- match(ssGSEA$gene.set,rownames(ssGSEA.data$w))
y <- removeBatchEffect(ssGSEA.data$w[i,],design = model.matrix(~tissue.sampled), batch= tissue.grafted)
labels.col <- rep("",nrow(rnaseq$meta))
labels.row <- ssGSEA$gene.set
ann.col <- as.data.frame(rnaseq$meta[,.(tissue.sampled,tissue.grafted)])
rownames(ann.col) <- rnaseq$meta[,Sample.name]
pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
}
d <- model.matrix(~ 0 + tissue.grafted + tissue.sampled + mouse.id)
d <- d[,!(colnames(d) %in% c("mouse.idX45241","mouse.idX48803","mouse.idX48898"))]
cont.matrix <- makeContrasts(
G2vsG1 = tissue.graftedAB861M.XT1.XM1 - tissue.graftedAB861M.XT1,
G3vsG2 = tissue.graftedAB861M.XT1.XM1.XM1 - tissue.graftedAB861M.XT1.XM1,
levels = colnames(d))
rownames(cont.matrix) <- colnames(d)
vfit <- lmFit(DE.data$v, d)
vfit <- contrasts.fit(vfit, contrasts = cont.matrix)
efit <- eBayes(vfit)
DE.sub <- decideTest.annotated.DE(efit)
'select()' returned 1:many mapping between keys and columns
table(DE.sub[,2:3])
G3vsG2
G2vsG1 -1 0 1
-1 1 229 23
0 74 0 240
1 0 30 2
vennDiagram.paired(DE.sub[,2:3])
DE.sub[abs(rowSums(DE.sub[,2:3]))==2,]
i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
vfit <- lmFit(DE.data$v[i,], d)
vfit <- contrasts.fit(vfit, contrasts = cont.matrix)
efit <- eBayes(vfit)
DE.sub <- decideTest.annotated.DE(efit)
'select()' returned 1:1 mapping between keys and columns
table(DE.sub[,2:3])
G3vsG2
G2vsG1 -1 0 1
-1 0 18 3
0 1 0 2
vennDiagram.paired(DE.sub[,2:3])
DE.sub[abs(rowSums(DE.sub[,2:3]))==2,]
y <- subset.PDX(rnaseq,rnaseq$meta[,is.na(tissue.grafted)])$human
y <- log(t(1e6*t(y) / (colSums(y)*calcNormFactors(y))+0.5))
up.reg <- apply(y[rownames(y) %in% DE[direction=="up.reg",ENSEMBL],],1,function(y)y[3]-y[2])
down.reg <- apply(y[rownames(y) %in% DE[direction=="down.reg",ENSEMBL],],1,function(y)y[3]-y[2])
wilcox.test(up.reg,down.reg,alternative = "g")
Wilcoxon rank sum test with continuity correction
data: up.reg and down.reg
W = 500, p-value = 0.06827
alternative hypothesis: true location shift is greater than 0
z <- rbind(data.table(direction="up",LFC=up.reg,ENSEMBL=names(up.reg)),
data.table(direction="down",LFC=down.reg,ENSEMBL=names(down.reg)))
ggplot(z) + aes(x=LFC,fill=direction) + geom_density(alpha=0.5)
z <- merge(z,DE,by="ENSEMBL")
table(z[,.(direction.x,direction.human=LFC>0)])
direction.human
direction.x FALSE TRUE
down 34 16
up 8 8
z[direction.x=="up",.(ENSEMBL,SYMBOL,LFC,direction.x,LFC>0)][order(-LFC)]
z[direction.x=="down",.(ENSEMBL,SYMBOL,LFC,direction.x,LFC>0)][order(LFC)]
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & tissue.grafted!="AB861M"])
tissue.sampled <- factor(make.names(x$meta$tissue.sampled),levels = c("T","LN"))
tissue.grafted <- make.names(x$meta$tissue.grafted)
mouse.id <- make.names(x$meta$mouse)
exp.protocol <- make.names(x$meta$exp.protocol)
merged.lanes <- make.names(x$meta$merged.lanes)
d <- model.matrix(~ 0 + tissue.sampled * tissue.grafted + mouse.id + merged.lanes)
d <- d[,!(colnames(d) %in% c("mouse.idX45241","mouse.idX48803","mouse.idX48898"))]
cont.matrix <- matrix(c(-1,1,rep(0,dim(d)[2]-2)))
rownames(cont.matrix) <- colnames(d)
#Perform DE
DE.data <- DE.run(x$human,d,2,cont.matrix = cont.matrix)
#Top DE genes table
DE <- topTable.annotated.DE(DE.data$efit,p.val = 0.05)
'select()' returned 1:1 mapping between keys and columns
DE <- DE[!(ENSEMBL %in% hpa.gene.list$Ensembl)]
if(nrow(DE)!=0){
print(DE[direction=="up.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
print(DE[direction=="down.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
#Pheatmap w/ batch correction
#i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
#y <- removeBatchEffect(DE.data$v$E[i,],design = model.matrix(~tissue.sampled), batch= tissue.grafted)
#labels.row <- DE$SYMBOL
#labels.col <- rep("",nrow(x$meta))
#ann.col <- as.data.frame(x$meta[,.(tissue.sampled,tissue.grafted)])
#rownames(ann.col) <- x$meta[,Sample.name]
#pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
}
#GSEA - c6
gsea <- GSEA.run(DE.data$v,d,cont.matrix,Hs.c6,0.05)
'select()' returned 1:many mapping between keys and columns
gsea[Direction=="Up",.(rn,NGenes,Direction,FDR)]
gsea[Direction=="Down",.(rn,NGenes,Direction,FDR)]
#GSEA - c2
gsea <- GSEA.run(DE.data$v,d,cont.matrix,Hs.c2,0.05)
'select()' returned 1:many mapping between keys and columns
gsea[Direction=="Up",.(rn,NGenes,Direction,FDR)]
gsea[Direction=="Down",.(rn,NGenes,Direction,FDR)]
# ssGSEA
ssGSEA.data <- ssGSEA.run(DE.data$v$E,Hs.c2,d,cont.matrix = cont.matrix)
'select()' returned 1:many mapping between keys and columns
The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.
Estimating GSVA scores for 4472 gene sets.
Computing observed enrichment scores
Estimating ECDFs in microarray data with Gaussian kernels
Using parallel with 4 cores
|
| | 0%
#Top DE gene.sets table
ssGSEA <- topTable.annotated.ssGSEA(ssGSEA.data$efit)
No DE genes
if(nrow(ssGSEA)!=0){
print(ssGSEA[direction=="up.reg",.(direction,gene.set,adj.P.Val,logFC)])
print(ssGSEA[direction=="down.reg",.(direction,gene.set,adj.P.Val,logFC)])
#Pheatmap w/ batch correction
i <- match(ssGSEA$gene.set,rownames(ssGSEA.data$w))
y <- removeBatchEffect(ssGSEA.data$w[i,],design = model.matrix(~tissue.sampled), batch= tissue.grafted)
labels.col <- rep("",nrow(rnaseq$meta))
labels.row <- ssGSEA$gene.set
ann.col <- as.data.frame(rnaseq$meta[,.(tissue.sampled,tissue.grafted)])
rownames(ann.col) <- rnaseq$meta[,Sample.name]
pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
}
Note: only using paired samples
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & mouse %in% rnaseq$meta[,.N,mouse][N==2,mouse]])
tissue.sampled <- factor(make.names(x$meta$tissue.sampled),levels = c("T","LN"))
tissue.grafted <- make.names(x$meta$tissue.grafted)
mouse.id <- make.names(x$meta$mouse)
exp.protocol <- make.names(x$meta$exp.protocol)
merged.lanes <- make.names(x$meta$merged.lanes)
d <- model.matrix(~ tissue.sampled:tissue.grafted + mouse.id)
d <- d[,!(colnames(d) %in% c("tissue.sampledT:tissue.graftedAB861M.XT1","tissue.sampledT:tissue.graftedAB861M.XT1.XM1","tissue.sampledT:tissue.graftedAB861M.XT1.XM1.XM1","tissue.sampledT:tissue.graftedAB861M.XT1.XM1.XT1"))]
DE.data <- DE.run(x$human,d,2,coefs = 12:15)
DE <- decideTest.annotated.DE(DE.data$efit)
'select()' returned 1:1 mapping between keys and columns
vennDiagram.paired(DE[,2:5])
x <- subset.PDX(rnaseq,rnaseq$meta[,xenograft & tissue.grafted=="AB861M-XT1" & tissue.sampled=="T"])
x$meta[,mets:=!(mouse %in% c(35309,35311,35311,35312,36361))]
mets <- make.names(x$meta$mets)
d <- model.matrix(~ mets)
DE.data <- DE.run(x$human,d,2,coefs = 2)
#Top DE genes table
DE <- topTable.annotated.DE(DE.data$efit,p.val = 0.05)
No DE genes
if(nrow(DE)!=0){
DE <- DE[!(ENSEMBL %in% hpa.gene.list$Ensembl)]
print(DE[direction=="up.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
print(DE[direction=="down.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
#Pheatmap w/ batch correction
i <- match(DE$ENSEMBL,rownames(DE.data$v$E))
y <- removeBatchEffect(DE.data$v$E[i,],design = model.matrix(~tissue.sampled), batch= tissue.grafted)
labels.row <- DE$SYMBOL
labels.col <- rep("",nrow(x$meta))
ann.col <- as.data.frame(x$meta[,.(tissue.sampled,tissue.grafted)])
rownames(ann.col) <- x$meta[,Sample.name]
pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
}
#GSEA - c6
gsea <- GSEA.run(DE.data$v,d,2,Hs.c6,0.05)
'select()' returned 1:many mapping between keys and columns
gsea[Direction=="Up",.(rn,NGenes,Direction,FDR)]
gsea[Direction=="Down",.(rn,NGenes,Direction,FDR)]
#GSEA - c2
gsea <- GSEA.run(DE.data$v,d,2,Hs.c2,0.05)
'select()' returned 1:many mapping between keys and columns
gsea[Direction=="Up",.(rn,NGenes,Direction,FDR)]
gsea[Direction=="Down",.(rn,NGenes,Direction,FDR)]
# ssGSEA
ssGSEA.data <- ssGSEA.run(DE.data$v$E,Hs.c2,d,coefs = 2)
'select()' returned 1:many mapping between keys and columns
The argument 'rnaseq' is deprecated and will be removed in the next release of GSVA. Please use the 'kcdf' argument instead.
Estimating GSVA scores for 4433 gene sets.
Computing observed enrichment scores
Estimating ECDFs in microarray data with Gaussian kernels
Using parallel with 4 cores
|
| | 0%
#Top DE gene.sets table
ssGSEA <- topTable.annotated.ssGSEA(ssGSEA.data$efit)
No DE genes
if(nrow(ssGSEA)!=0){
print(ssGSEA[direction=="up.reg",.(direction,gene.set,adj.P.Val,logFC)])
print(ssGSEA[direction=="down.reg",.(direction,gene.set,adj.P.Val,logFC)])
#Pheatmap w/ batch correction
i <- match(ssGSEA$gene.set,rownames(ssGSEA.data$w))
y <- removeBatchEffect(ssGSEA.data$w[i,],design = model.matrix(~tissue.sampled), batch= tissue.grafted)
labels.col <- rep("",nrow(rnaseq$meta))
labels.row <- ssGSEA$gene.set
ann.col <- as.data.frame(rnaseq$meta[,.(tissue.sampled,tissue.grafted)])
rownames(ann.col) <- rnaseq$meta[,Sample.name]
pheatmap(y,labels_col=labels.col,labels_row=labels.row,annotation_col = ann.col,scale="row")
}
DE.data <- DE.run(x$mouse,d,2,coefs = 2)
#Top DE genes table
DE <- topTable.annotated.DE(DE.data$efit,p.val = 0.05)
No DE genes
if(nrow(DE)!=0){
DE <- DE[!(ENSEMBL %in% hpa.gene.list$Ensembl)]
print(DE[direction=="up.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
print(DE[direction=="down.reg",.(direction,ENSEMBL,SYMBOL,adj.P.Val,logFC)])
}
sessionInfo()
R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin16.7.0 (64-bit)
Running under: macOS Sierra 10.12.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libLAPACK.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] BiocInstaller_1.28.0 pheatmap_1.0.8 ggthemes_3.4.0 ggplot2_2.2.1 org.Mm.eg.db_3.5.0 org.Hs.eg.db_3.5.0 AnnotationDbi_1.40.0
[8] IRanges_2.12.0 S4Vectors_0.16.0 Biobase_2.38.0 BiocGenerics_0.24.0 GSVA_1.26.0 edgeR_3.20.1 limma_3.34.1
[15] data.table_1.10.4-3
loaded via a namespace (and not attached):
[1] statmod_1.4.30 locfit_1.5-9.1 reshape2_1.4.2 splines_3.4.2 lattice_0.20-35 colorspace_1.3-2 htmltools_0.3.6
[8] yaml_2.1.14 blob_1.1.0 XML_3.98-1.9 rlang_0.1.4 DBI_0.7 bit64_0.9-7 RColorBrewer_1.1-2
[15] plyr_1.8.4 stringr_1.2.0 munsell_0.4.3 gtable_0.2.0 memoise_1.1.0 labeling_0.3 knitr_1.17
[22] geneplotter_1.56.0 httpuv_1.3.5 GSEABase_1.40.0 Rcpp_0.12.13 xtable_1.8-2 scales_0.5.0 graph_1.56.0
[29] annotate_1.56.1 mime_0.5 bit_1.1-12 digest_0.6.12 stringi_1.1.5 shiny_1.0.5 grid_3.4.2
[36] tools_3.4.2 bitops_1.0-6 magrittr_1.5 RCurl_1.95-4.8 lazyeval_0.2.1 shinythemes_1.1.1 tibble_1.3.4
[43] RSQLite_2.0 pkgconfig_2.0.1 assertthat_0.2.0 R6_2.2.2 compiler_3.4.2